library(readODS) #read ods files
library(readxl) #reads excel files
library(tidyverse) #Tidy packages
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr) #lots of functions - data manipulation
library(usethis) #look at and apply settings to Github actions
library(ggplot2) #pretty plots
library(ggthemes) #themes for ggplot
library(RColorBrewer) #color palettes
library(wesanderson) #color palettes from Wes Anderson films
library(scales) #axis formatting options
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
library(plotly) #interactive web graphs
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(networkD3) #interactive web network graphs
library(flipPlots) #plots (install with Github)

usethis::edit_r_environ() #set environment with Github token for API
## • Edit '/Users/kaydeebarker/.Renviron'
## • Restart R for changes to take effect

The National Atmospheric Emissions Inventory of the UK

Introduction to the inventory, with links kilotonnes of CO2 equivalents

# Map loop to download UK 2022 data from the UK Department for Energy Security and Net Zero
downloaded <- file.exists("UKGHG_2022.ods") #checks if file is downloaded in working directory
if(downloaded != T){ #if downloaded is not true
  map2("https://assets.publishing.service.gov.uk/media/65c0d54663a23d000dc821ca/final-greenhouse-gas-emissions-2022-by-source-dataset.ods", #update this link when new data available
                         "UKGHG_2022.ods", download.file)} else{print('data downloaded')} #name and download or print
## [1] "data downloaded"
#Read in UK 2022 data from the UK Department for Energy Security and Net Zero
GHG_UK22 <- read_ods(
  path = "UKGHG_2022.ods",
  sheet = 1, #define tab/sheet to read
  col_names = TRUE, #use header row for column names
  col_types = NULL, #guess data types
  na = "", #treat blank cells as NA
  skip = 0, #don't skip rows
  formula_as_formula = FALSE, #values only
  range = NULL, 
  row_names = FALSE, #no row names
  strings_as_factors = TRUE) %>% #use factors
  rename("Subsector" = "TES Subsector") #rename variable

#Create dataframe with only categorization
GHG_categories <- GHG_UK22[, c(3,6:12)] %>%
  distinct() 

#IPCC code and TES subsectors - for categorization
GHG_IPCC_TES <- GHG_UK22[, c(3,6,7)] %>%
  distinct() %>%
  rename("IPCC_code" = "IPCC Code") #rename variable to match other df

# Map loop to download UK Regional data 1990-2021 from the UK National Atmospheric Emissions Inventory
downloaded2 <- file.exists("GHG_regions.xlsx") #checks if file is downloaded in working directory
if(downloaded2 != T){ #if downloaded is not true
  map2("https://uk-air.defra.gov.uk/reports/cat09/2306200930_DA_GHGI_1990-2021_Final_v3.1.xlsx", #update this link when new data available
                        "GHG_regions.xlsx", download.file)} else{print('data downloaded')} #name and download or print
## [1] "data downloaded"
#Read in excel file, by source, with region
GHG_regions <- read_xlsx(
  "GHG_regions.xlsx", #excel file from current working directory
  sheet = "BySource_data", #define tab/sheet to read
  col_names = TRUE, #use header row for column names
  col_types = NULL, #guess data types
  na = "", #treat blank cells as NA
  trim_ws = TRUE, #trim any whitespace/unused columns and rows
  skip = 6, #skip 6 rows to get to pivot table data
  n_max = Inf, #set maximum number of rows to include
  guess_max = min(1000, Inf), #how many rows to use to guess data types
  progress = readxl_progress(), #display progress in reading in data
  .name_repair = "unique" #makes sure all column names not empty or duplicated
)

#Add subsector into GHG_regions, match format 
GHG_regions2 <- inner_join(GHG_IPCC_TES, GHG_regions,
    by='IPCC_code') %>% #use inner join to combine dataframes by a shared variable
  rename("GHG" = "Pollutant") %>% #rename variable to match UK only csv
  rename("NC_Sector" = "NCFormat") %>% #rename variable
  rename("TES_Sector" = "TES Sector") #rename variable

#Simplify regional dataset
GHG_simp <- GHG_regions2 %>%
  group_by(EmissionYear, TES_Sector, Subsector, RegionName, GHG) %>%
  summarize(Temissions = sum(Emission, na.rm = TRUE)) %>%
  rename("Sector" = "TES_Sector")
## `summarise()` has grouped output by 'EmissionYear', 'TES_Sector', 'Subsector',
## 'RegionName'. You can override using the `.groups` argument.
#2021 only
GHG_2021 <- GHG_simp %>%
    filter(EmissionYear == "2021") #filter to only 2021

#Grouped into all of UK
GHG_2021_joined <- GHG_2021 %>%
  group_by(Sector, Subsector, GHG) %>%
  summarize(Temissions = sum(Temissions, na.rm = TRUE))
## `summarise()` has grouped output by 'Sector', 'Subsector'. You can override
## using the `.groups` argument.

Visualization at High Level

#Define custom color palette
colors <- paste(c('#543005','#8c510a','#bf812d','#dfc27d','#f6e8c3','#f5f5f5','#c7eae5','#80cdc1','#35978f','#01665e',
                  '#003c30','#40004b','#762a83','#9970ab','#c2a5cf','#e7d4e8','#f7f7f7','#d9f0d3','#a6dba0','#5aae61',
                  '#1b7837','#00441b','#67001f','#b2182b','#d6604d','#f4a582','#fddbc7','#f7f7f7','#d1e5f0','#92c5de',
                  '#4393c3','#2166ac','#053061','#c51b7d','#de77ae','#f1b6da','#fde0ef','#e6f5d0','#b8e186','#7fbc41',
                  '#4d9221'), collapse = '", "')

colorJS <- paste('d3.scaleOrdinal(["', colors, '"])') #format to call Javascript D3

Sankey Diagram

#Set up Sankey links dataframe - from sector to subsector to GHGs
links <- data.frame(source=c(paste0(GHG_2021_joined$Sector), paste0(GHG_2021_joined$Subsector)), 
                    target=c(paste0(GHG_2021_joined$Subsector), paste0(GHG_2021_joined$GHG)),
                    value=as.numeric(paste0(GHG_2021_joined$Temissions)))

links <- links[-c(83:85),] #remove instances with repeat name

#Create nodes df from names in links df
nodes <- data.frame(
  name=unique(c(as.character(links$source), 
  as.character(links$target))))

#Add ID numbers
links$IDsource <- as.numeric(match(links$source, nodes$name)-1)
links$IDtarget <- as.numeric(match(links$target, nodes$name)-1)

#Set up Sankey links dataframe - breakdown from GHGs to sectors to subsectors
links2 <- data.frame(source=c(paste0(GHG_2021_joined$GHG), paste0(GHG_2021_joined$Sector)), 
                    target=c(paste0(GHG_2021_joined$Sector), paste0(GHG_2021_joined$Subsector)),
                    value=as.numeric(paste0(GHG_2021_joined$Temissions))) 

links2 <- links2[-c(168:170),] #remove instances with repeat name

#Create nodes df from names in links df
nodes2 <- data.frame(
  name=unique(c(as.character(links2$source), 
  as.character(links2$target))))

#Add ID numbers
links2$IDsource <- as.numeric(match(links2$source, nodes2$name)-1)
links2$IDtarget <- as.numeric(match(links2$target, nodes2$name)-1)

#Plot with networkD3
sankey <- sankeyNetwork(Links = links2, Nodes = nodes2, #dataframes to use
              Source = "IDsource", Target = "IDtarget", #numeric identifiers of variables
              Value = "value", #numeric weights
                            NodeID = "name", #node labels
              units = "kt CO2 eq.", #label units
                            colourScale = colorJS,
                            fontSize = 7,
              sinksRight=FALSE, #justify last variables right if true
                            nodePadding = 8, #vertical space between nodes
              iterations = 10) #number of times for it to try to find the best order and format

sankey #view plot

Sankey diagram demonstrating proportional breakdown of greenhouse gas emissions in the UK in 2021. From left to right, greenhouse gas by sector by subsector. Proportions based on kilotonnes of CO2 equivalents.

#Plot with Plotly
sankey3 <- plot_ly(type = "sankey",
                   domain = list(x =  c(0,1),y =  c(0,1)),
                   orientation = "h",
                   valueformat = ".0f",
                   valuesuffix = "kt CO2 eq.",
      node = list(
        label = nodes$name,
        pad = 15,
        thickness = 20,
        line = list(color = "black", width = 0.5),
      link = list(
        source = links$IDsource,
        target = links$IDtarget,
        value = links$value
        #label = linklist$label
        ))) %>% 
  layout(
    title = "Greenhouse Gas Emissions Per Sector in the UK",
    font = list(size = 10),
    xaxis = list(showgrid = F, zeroline = F),
    yaxis = list(showgrid = F, zeroline = F))

sankey3

Time Series Graph of Emissions by Sector

#Plot time series
Sect_time <- GHG_regions2 %>%
  group_by(EmissionYear, TES_Sector) %>%
  summarize(Temissions = sum(Emission, na.rm = TRUE)) %>%
  rename("Sector" = "TES_Sector") %>%
ggplot(aes(x=as.numeric(EmissionYear), y=Temissions, color=Sector)) +
  geom_line() +
  xlab("") + ylab("Greenhouse Gas Emissions (kilotonnes CO2 eq.)") +
  ggtitle("Greenhouse Gas Emissions of the UK per Sector Over Time") +
  theme_few() +
  scale_y_continuous(labels = comma) +
  scale_x_continuous(breaks = pretty(c(1990:2020), n=6)) +
  scale_color_brewer(palette = "Set2") +
  theme(legend.position="bottom")
## `summarise()` has grouped output by 'EmissionYear'. You can override using the
## `.groups` argument.
#Sect_time #view ggplot

#Make interactive with Plotly
t_int <- ggplotly(Sect_time) %>% #convert to interactive graph
  layout(legend = list(
      orientation = "h"))
  
#Plotly customization
style(t_int, hovertemplate="Year: %{x:,.r}
CO2eq: %{y:,.4r} kt") #define what hover label says, round x values, round y values to 4 sig. figs

Greenhouse gas emissions in kilotonnes of CO2 equivalents in the UK over time, by sector.

Sector and Subsector Summaries

ag_df <- GHG_regions2 %>%
  filter(TES_Sector == "Agriculture") %>% #filter sector
  filter(EmissionYear == "2021") %>% #filter to only 2021 
  group_by(Subsector, IPCC_name, GHG, IPCC_code) %>%
  summarize(Temissions = sum(Emission, na.rm = TRUE))
## `summarise()` has grouped output by 'Subsector', 'IPCC_name', 'GHG'. You can
## override using the `.groups` argument.
ag_df$IPCC_name <- gsub('_', ' ', ag_df$IPCC_name) #replace underscores with spaces to make nicer to read

Agriculture

#Set up Sankey links dataframe
aglinks <- data.frame(source=c(paste0(ag_df$GHG), paste0(ag_df$Subsector)), 
                    target=c(paste0(ag_df$Subsector), paste0(ag_df$IPCC_name)),
                    value=as.numeric(paste0(ag_df$Temissions)))

#Create nodes df from names in links df
agnodes <- data.frame(
  name=unique(c(as.character(aglinks$source), 
  as.character(aglinks$target))))

#Add ID numbers
aglinks$IDsource <- as.numeric(match(aglinks$source, agnodes$name)-1)
aglinks$IDtarget <- as.numeric(match(aglinks$target, agnodes$name)-1)


#Plot with networkD3
agsankey <- sankeyNetwork(Links = aglinks, Nodes = agnodes,
              Source = "IDsource", Target = "IDtarget",
              Value = "value", NodeID = "name",
              units = "kilotonnes CO2 eq.",
              sinksRight=FALSE, nodePadding = 8,
              iterations = 5)

agsankey

Sankey diagram demonstrating breakdown of greenhouse gas emissions in the UK’s agricultural sector in 2021. From left to right, greenhouse gas by subsector by individual source, following IPCC source names.

#Bar plot by subsector, stacked by GHG
ggplot(ag_df, aes(x = Subsector, y = Temissions, fill = GHG)) + 
  geom_col() + 
  theme_few() + #remove grid, white background
  scale_fill_manual(values = wes_palette("GrandBudapest1")) +
  xlab("Subsector") + ylab("Emissions (kt CO2 eq.)") #define/change X and Y labels
Bar graph showing greenhouse gas emissions in kilotonnes of CO2 equivalents in the UK's agricultural sector in 2021 by subsector, colored by greenhouse gas identity.

Bar graph showing greenhouse gas emissions in kilotonnes of CO2 equivalents in the UK’s agricultural sector in 2021 by subsector, colored by greenhouse gas identity.

#Will make donut or pie chart here
#Bar plot by source
ggplot(ag_df, aes(x = IPCC_name, y = Temissions, fill = Subsector)) + 
  geom_bar(stat = "identity", width = 0.7, position = "dodge") + 
  theme_few() + #remove grid, white background
  scale_fill_manual(values = wes_palette("GrandBudapest1")) +
  scale_x_discrete(guide = guide_axis(angle = 45)) +
  ggtitle("Agriculture emissions in 2021") + #add a graph title
  xlab("") + ylab("Emissions (kt CO2 eq.)") + #define/change X and Y labels
  theme(legend.position="top", legend.box = "horizontal",
        text=element_text(size=10, colour = "black"), 
        axis.text.x = element_text(size=8, colour = "black"))
Bar graph showing greenhouse gas emissions in kilotonnes of CO2 equivalents in the UK's agricultural sector in 2021 by source, colored by subsector.

Bar graph showing greenhouse gas emissions in kilotonnes of CO2 equivalents in the UK’s agricultural sector in 2021 by source, colored by subsector.

#Bar plot by source, facet by GHG
ggplot(ag_df, aes(x = IPCC_code, y = Temissions, fill = Subsector)) + 
  geom_bar(stat = "identity", width = 0.7, position = "dodge") + facet_grid(~ GHG) +
  theme_few() + #remove grid, white background
  scale_fill_manual(values = wes_palette("GrandBudapest1")) +
  scale_x_discrete(guide = guide_axis(angle = 90)) +
  ggtitle("Agriculture emissions in 2021") + #add a graph title
  xlab("Source IPCC Code") + ylab("Emissions (kt CO2 eq.)") + #define/change X and Y labels
  theme(text=element_text(size=8, colour = "black"))
Bar graph showing greenhouse gas emissions in kilotonnes of CO2 equivalents in the UK's agricultural sector in 2021 by IPCC code, colored by subsector and grouped by greenhouse gas identity.

Bar graph showing greenhouse gas emissions in kilotonnes of CO2 equivalents in the UK’s agricultural sector in 2021 by IPCC code, colored by subsector and grouped by greenhouse gas identity.

#Bar plot combustion, stacked by GHG
ag_df %>%
  filter(Subsector == "Agricultural combustion") %>%
ggplot(aes(x = IPCC_name, y = Temissions, fill = GHG)) + 
  geom_col() + 
  theme_few() + #remove grid, white background
  scale_fill_manual(values = wes_palette("GrandBudapest1")) +
  scale_x_discrete(guide = guide_axis(angle = 45)) +
  ggtitle("Subsector emissions in 2021") + #add a graph title
  xlab("") + ylab("Emissions (kt CO2 eq.)") + #define/change X and Y labels
  theme(legend.position="left", legend.box = "vertical", 
        axis.text.x = element_text(size=8, colour = "black"))
Bar graph showing greenhouse gas emissions in kilotonnes of CO2 equivalents in the agricultural combustion subsector in 2021 by source, colored by greenhouse gas identity.

Bar graph showing greenhouse gas emissions in kilotonnes of CO2 equivalents in the agricultural combustion subsector in 2021 by source, colored by greenhouse gas identity.

#Bar plot soils, stacked by GHG
ag_df %>%
  filter(Subsector == "Agricultural soils") %>%
ggplot(aes(x = IPCC_name, y = Temissions, fill = GHG)) + 
  geom_col() + 
  theme_few() + #remove grid, white background
  scale_fill_manual(values = wes_palette("GrandBudapest1")) +
  scale_x_discrete(guide = guide_axis(angle = 45)) +
  ggtitle("Subsector emissions in 2021") + #add a graph title
  xlab("") + ylab("Emissions (kt CO2 eq.)") + #define/change X and Y labels
  theme(legend.position="left", legend.box = "vertical", 
        axis.text.x = element_text(size=8, colour = "black"))
Bar graph showing greenhouse gas emissions in kilotonnes of CO2 equivalents in the agricultural soils subsector in 2021 by source, colored by greenhouse gas identity.

Bar graph showing greenhouse gas emissions in kilotonnes of CO2 equivalents in the agricultural soils subsector in 2021 by source, colored by greenhouse gas identity.

#Bar plot livestock, stacked by GHG
ag_df %>%
  filter(Subsector == "Livestock") %>%
ggplot(aes(x = IPCC_name, y = Temissions, fill = GHG)) + 
  geom_col() + 
  theme_few() + #remove grid, white background
  scale_fill_manual(values = wes_palette("GrandBudapest1")) +
  scale_x_discrete(guide = guide_axis(angle = 45)) +
  ggtitle("Subsector emissions in 2021") + #add a graph title
  xlab("") + ylab("Emissions (kt CO2 eq.)") + #define/change X and Y labels
  theme(legend.position="left", legend.box = "vertical", 
        axis.text.x = element_text(size=8, colour = "black"))
Bar graph showing greenhouse gas emissions in kilotonnes of CO2 equivalents in the livestock subsector in 2021 by source, colored by greenhouse gas identity.

Bar graph showing greenhouse gas emissions in kilotonnes of CO2 equivalents in the livestock subsector in 2021 by source, colored by greenhouse gas identity.

#Bar plot other ag, stacked by GHG
ag_df %>%
  filter(Subsector == "Other agriculture") %>%
ggplot(aes(x = IPCC_name, y = Temissions, fill = GHG)) + 
  geom_col() + 
  theme_few() + #remove grid, white background
  scale_fill_manual(values = wes_palette("GrandBudapest1")) +
  scale_x_discrete(guide = guide_axis(angle = 45)) +
  ggtitle("Subsector emissions in 2021") + #add a graph title
  xlab("") + ylab("Emissions (kt CO2 eq.)") + #define/change X and Y labels
  theme(legend.position="left", legend.box = "vertical", 
        axis.text.x = element_text(size=8, colour = "black"))
Bar graph showing greenhouse gas emissions in kilotonnes of CO2 equivalents in the agricultural 'other' subsector in 2021 by source, colored by greenhouse gas identity.

Bar graph showing greenhouse gas emissions in kilotonnes of CO2 equivalents in the agricultural ‘other’ subsector in 2021 by source, colored by greenhouse gas identity.